Mapping unemployment ratio to AfD election results in German Wahlkreise

library(sf)
## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1
library(stringr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.0     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ forcats 0.5.1
## ✓ readr   1.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(huxtable)
## 
## Attaching package: 'huxtable'
## The following object is masked from 'package:dplyr':
## 
##     add_rownames
## The following object is masked from 'package:ggplot2':
## 
##     theme_grey
library(broom)

Geo data

wahlkreise_shp <- st_read(my_path_wahlkreise)
## Reading layer `Geometrie_Wahlkreise_20DBT' from data source 
##   `/Users/duzhiting/Documents/GitHub/R-Prediction/btw21_geometrie_wahlkreise_shp/Geometrie_Wahlkreise_20DBT.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 299 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 280371.1 ymin: 5235856 xmax: 921120.1 ymax: 6101444
## Projected CRS: ETRS89 / UTM zone 32N
glimpse(wahlkreise_shp)
## Rows: 299
## Columns: 5
## $ WKR_NR    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 1…
## $ WKR_NAME  <chr> "Flensburg – Schleswig", "Nordfriesland – Dithmarschen Nord"…
## $ LAND_NR   <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", …
## $ LAND_NAME <chr> "Schleswig-Holstein", "Schleswig-Holstein", "Schleswig-Holst…
## $ geometry  <MULTIPOLYGON [m]> MULTIPOLYGON (((545529.8 60..., MULTIPOLYGON ((…
wahlkreise_shp %>%
  ggplot() +
  geom_sf()

wahlkreise_shp %>%
  ggplot() +
  geom_sf(fill = "grey40") +
  theme_void()

unemployment ratios

unemp_file <- "~/Documents/Github/R-Prediction/btw21_Strukturdaten.csv"

file.exists(unemp_file)
## [1] TRUE
unemp_de_raw <- read_delim(unemp_file,
    ";", escape_double = FALSE,
    locale = locale(decimal_mark = ",",
        grouping_mark = "."),
    trim_ws = TRUE,
    skip = 8)  # skipt the first 8 rows
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   Land = col_character(),
##   `Wahlkreis-Nr.` = col_character(),
##   `Wahlkreis-Name` = col_character(),
##   `Fläche am 31.12.2019 (km²)` = col_number(),
##   Fußnoten = col_character()
## )
## ℹ Use `spec()` for the full column specifications.

we need to do some cleansing before we can work with this dataset.

unemp_names <- names(unemp_de_raw)

unemp_de <- unemp_de_raw

names(unemp_de) <- paste0("V",1:ncol(unemp_de))

The important columns are:

The important columns are:

unemp_de <- unemp_de %>%
  rename(state = V1,
         area_nr = V2,
         area_name = V3,
         for_prop = V8,
         pop_move = V11,
         pop_migr_background = V19,
         income = V26,
         unemp = V47)  

AfD election results

elec_results = read.csv2("~/Documents/Github/R-Prediction/kerg.csv", head = TRUE, sep="\t")
head(elec_results)
NrGebietWaehler_gueltige_Zweitstimmen_vorlauefigAfD3
1Flensburg – Schleswig18011210317
2Nordfriesland – Dithmarschen Nord1453878798
3Steinburg – Dithmarschen Süd13629911303
4Rendsburg-Eckernförde16206010564
5Kiel1559867654
6Plön – Neumünster1337219741
glimpse(wahlkreise_shp)
## Rows: 299
## Columns: 5
## $ WKR_NR    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 1…
## $ WKR_NAME  <chr> "Flensburg – Schleswig", "Nordfriesland – Dithmarschen Nord"…
## $ LAND_NR   <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", …
## $ LAND_NAME <chr> "Schleswig-Holstein", "Schleswig-Holstein", "Schleswig-Holst…
## $ geometry  <MULTIPOLYGON [m]> MULTIPOLYGON (((545529.8 60..., MULTIPOLYGON ((…

For each party, four values are reported:

primary vote, present election primary vote, previous election secondary vote, present election secondary vote, previous election

afd_prop <- elec_results %>%
  rename(afd_votes = AfD3,
         area_nr = Nr,
         area_name = Gebiet,
         votes_total = Waehler_gueltige_Zweitstimmen_vorlauefig) %>%
  mutate(afd_prop = afd_votes / votes_total) %>%
  na.omit
unemp_de$area_nr <- as.integer(unemp_de$area_nr)

In the previous step, we have selected the columns of interest, changed their name (shorter, English), and have computed the proportion of (valid) secondary votes in favor of the AfD.

Match unemployment and income to AfD votes for each Wahlkreis

wahlkreise_shp %>%
  left_join(unemp_de, by = c("WKR_NR" = "area_nr")) %>%
  left_join(afd_prop, by = "area_name") -> chloro_data
view(unemp_de)
view(wahlkreise_shp)
view(chloro_data)

Plot geo map with afd votes

chloro_data %>%
  ggplot() +
  geom_sf(aes(fill = afd_prop)) -> p1
p1

p1 + scale_fill_distiller(palette = "Spectral") +
  theme_void()

Geo map (of election areas) with unemployment map

chloro_data %>%
  ggplot() +
  geom_sf(aes(fill = unemp)) +
  scale_fill_distiller(palette = "Spectral") +
  theme_void() -> p2
p2

Concordance of AfD results and unemployment/income

Let’s compute the percent ranking for each of the variables of interest (AfD votes, unemployment ratio, and income). Then we can compute the concordance for each pair by simply computing the difference (or maybe absolute difference). After that, we plot this “concordance variables” as fill color to the map.

chloro_data %>%
  mutate(afd_rank = percent_rank(afd_prop),
         unemp_rank = percent_rank(unemp),
         income_rank = percent_rank(income)) %>%
  mutate(afd_income_diff = subtract(afd_rank, income_rank),
         afd_unemp_diff = subtract(afd_rank, unemp_rank)) -> chloro_data

Let’s check the first ranks for each of the variables of interest. AfD ranks first:

chloro_data %>%
  as.data.frame %>%
  select(area_name, afd_rank, afd_prop, unemp_rank, income_rank) %>%
  arrange(-afd_rank) %>%
  slice(1:5)
area_nameafd_rankafd_propunemp_rankincome_rank
Görlitz1    0.3210.8560.228
Sächsische Schweiz-Osterzgebirge0.9970.3150.4190.601
Bautzen I0.9930.3140.4330.255
Erzgebirgskreis I0.99 0.3020.3620.617
Mittelsachsen0.9860.2970.4130.403

Goerlitz leads. Unemployment “top” places:

chloro_data %>%
  as.data.frame %>%
  select(area_name, afd_prop, unemp_rank, income_rank) %>%
  arrange(-unemp_rank) %>%
  slice(1:5)
area_nameafd_propunemp_rankincome_rank
Gelsenkirchen0.126 1    0.0101
Duisburg I0.08260.9930.0134
Duisburg II0.12  0.9930.0134
Bremen II – Bremerhaven0.08840.9830.292 
Dortmund I0.069 0.9830.121 

Gelsenkirchen is ahead of this sad pack. And the lowest unemployment ranks are at:

chloro_data %>%
  as.data.frame %>%
  select(area_name, afd_prop, unemp_rank, income_rank) %>%
  arrange(unemp_rank) %>%
  slice(1:5)
area_nameafd_propunemp_rankincome_rank
Donau-Ries0.109 0      0.651
Erding – Ebersberg0.07260.003360.862
Biberach0.107 0.003360.466
Roth0.08460.0101 0.748
Mittelems0.05120.0134 0.305

And finale income, low 5 and top 5:

chloro_data %>%
  as.data.frame %>%
  select(area_name, afd_prop, unemp_rank, income_rank) %>%
  arrange(income_rank) %>%
  slice(c(1:5, 294:299))
area_nameafd_propunemp_rankincome_rank
Helmstedt – Wolfsburg0.09370.48  0      
Gifhorn – Peine0.09630.332 0.00336
Salzgitter – Wolfenbüttel0.09790.718 0.00671
Gelsenkirchen0.126 1     0.0101 
Duisburg I0.08260.993 0.0134 
München-West/Mitte0.04240.362 0.973  
Düsseldorf I0.04280.819 0.987  
Düsseldorf II0.05980.819 0.987  
Starnberg – Landsberg am Lech0.06110.07720.993  
Bad Tölz-Wolfratshausen – Miesbach0.07960.057 0.997  
München-Land0.05260.04361      
chloro_data %>%
  ggplot() +
  geom_sf(aes(fill = afd_unemp_diff)) +
  scale_fill_gradient2() +
  theme_void() -> p3
p3

The fill color denotes the difference between unemployment rank of a given area and its afd vote rank. For example, if area X has an unemployment rank of .5 (50%), it means that half of the areas in the country have a lower (higher) unemployment ratio, respectively (the median). Similarly, an AfD vote rank of .5 indicates the median position. The difference of these two figures is zero, indicating accordance or close match. Thus, figures around zero denote accordance or match. 1 (100%) of AfD vote rank indicates the area with the best AfD results (area with the most votes); similar reasoning applies for income and unemployment ratio.

Hence, numbers greater than zero indicate that the AfD scored better than it would be expected by the accordance-hypothesis.

Similarly, numbers smaller than zero indicate that the AfD scored better than it would be expected by the accordance-hypothesis.

Areas with (near) white filling provide some support for the accordance hypothesis. There are areas of this type, but it is not the majority. The vast majority of areas showed too much or too little AfD - relative to their unemployment ratio.

This reasonsing shows that the AfD received better results in southern and middle areas of Germany than it would be expected by the accordance hypothesis. In contrast, the more poorer northern areas voted for the AfD much less often than it would be expected by the accordance hypothesis.

Let’s look at the areas with minimal and maximal dis-accordance, out of curiosity.

chloro_data %>%
  as.data.frame %>%
  select(area_name, afd_unemp_diff, unemp, afd_prop) %>%
  arrange(afd_unemp_diff) %>%
  slice(c(1:5, 295:299)) %>% hux %>%
  add_colnames
area_nameafd_unemp_diffunempafd_prop
area_nameafd_unemp_diffunempafd_prop
Berlin-Friedrichshain-Kreuzberg – Prenzlauer Berg Ost-0.90310.60.0405
Köln II-0.8869.80.0287
Bremen I-0.87511.20.0528
Essen III-0.86811.50.0549
Berlin-Charlottenburg-Wilmersdorf-0.86210.60.0473
Schwäbisch Hall – Hohenlohe0.75 3.60.126 
Rottweil – Tuttlingen0.7543.70.132 
Neu-Ulm0.76 3.20.118 
Höxter – Gütersloh III – Lippe II    4.9     
Paderborn    5.9     
chloro_data %>%
  as.data.frame %>%
  select(area_name, afd_unemp_diff, unemp, afd_prop) %>%
  arrange(afd_unemp_diff) %>%
  filter(afd_unemp_diff > -0.05, afd_unemp_diff < .05) %>%
  hux %>%
  add_colnames
area_nameafd_unemp_diffunempafd_prop
area_nameafd_unemp_diffunempafd_prop
Rotenburg I – Heidekreis-0.0444 5.70.0795
Halle-0.0413 9.50.147 
Groß-Gerau-0.0337 6.10.087 
Augsburg-Stadt-0.0299 6.40.0906
Magdeburg-0.0245 9.20.15  
Segeberg – Stormarn-Mitte-0.0215 5  0.0728
Herzogtum Lauenburg – Stormarn-Süd-0.0146 5.10.0746
Celle – Uelzen-0.009646.40.0913
Harburg-0.008234.80.0722
Ludwigshafen/Frankenthal0.005128.10.117 
Nienburg II – Schaumburg0.006625.90.087 
Coesfeld – Steinfurt II0.006963.40.0456
St. Wendel0.0173 6.30.0923
Kreuznach0.0177 6.70.0978
Hochsauerlandkreis0.0222 4.80.0737
Hochtaunus0.0225 5.10.0775
Mecklenburgische Seenplatte I – Vorpommern-Greifswald II0.0264 9.90.228 
Leipzig I0.0393 8.30.154 
Siegen-Wittgenstein0.0439 6  0.0907
München-Land0.0476 3.50.0526
Altmark0.0496 8.70.189 

Similar story for income.

chloro_data %>%
  ggplot() +
  geom_sf(aes(fill = afd_income_diff)) +
  scale_fill_gradient2() +
  theme_void() -> p4
p4

The map shows a clear pattern: The eastern parts of Germany are far more afd-oriented than their income rank would predict (diff scores above zero, blue color). However, for some areas across the whole rest of the country, the likewise pattern is true too: A lot areas are rich and do not vote for the AfD (reddish color, diff score below zero). And, thirdly, a lot of aras support the accordance hypothesis (white color, diff score around zero).

More simple map

Maybe we should simplify the map: Let’s only distinguish three type of areas: too much AfD in comparison to the unemployment, too few AfD for the unemployment, or AfD at par with unemployment. Maybe the picture is more clearcut then.

chloro_data %>%
  select(afd_unemp_diff) %>%
  mutate(afd_unemp_diff_3g = cut_interval(afd_unemp_diff, n = 3,
         labels = c("AFD < Arbeitslosigkeit",
                    "AFD = Arbeitslosigkeit",
                    "AFD > Arbeitslosigkeit"))) %>%
  ggplot() +
  geom_sf(aes(fill = afd_unemp_diff_3g)) +
  labs(fill) +
  theme_void()

“AfD density”

In a similar vein, we could compute the ratio of AfD votes and unemployment. That would give us some measure of covariability. Let’s see

library(viridis)
## Loading required package: viridisLite
chloro_data %>%
  mutate(afd_dens = afd_prop / unemp) %>%
  ggplot +
  geom_sf(aes(fill = afd_dens)) +
  theme_void() +
  scale_fill_viridis()

The diagram shows that in relation to unemployment, the AfD votes are strongest in Sachsen. Don’t forget that this measure is an indication of co-occurence, not of absolute AfD votes.

Correlation of unemployment and AfD votes

A simple, straight-forward and well-known approach to devise assocation strength is Pearson’s correlation coefficient. Oldie but goldie. Let’s depict it.

chloro_data %>%
  select(unemp, afd_prop, income, area_name) %>%
  ggplot +
  aes(x = unemp, y = afd_prop) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

chloro_data %>%
  select(unemp, afd_prop, income, area_name) %>%
  as.data.frame %T>%
  summarise(cor_afd_unemp = cor(afd_prop, unemp)) %>%
  do(tidy(cor.test(.$afd_prop, .$unemp)))
estimatestatisticp.valueparameterconf.lowconf.highmethodalternative
0.03560.6120.541295-0.07850.149Pearson's product-moment correlationtwo.sided

Regression residuals of predicting unemployment by afd_score

Let’s predict the AfD vote score taking the unemployment as an predictor. Then let’s plot the residuals to see how good the prediction is, ie., how close (or rather, far) the association of unemployment and AfD voting is.

chloro_data$afd_prop[is.na(chloro_data$afd_prop)] <- 0
view(chloro_data$afd_prop)
lm1 <- lm(afd_prop ~ unemp, data = chloro_data)

chloro_data %>%
  mutate(afd_lm1 = lm(afd_prop ~ unemp, data =  chloro_data)$residuals) -> chloro_data
chloro_data %>%
  select(afd_lm1) %>%
  ggplot() +
  geom_sf(aes(fill = afd_lm1)) +
  scale_fill_gradient2() +
  theme_void()

This model shows a clearcut picture: The eastern part is too “afd” for its unemployment ratio (some parts of east-southern Bavaria too); the west is less afd-ic than what would be expected by the unemployment. The rest (middle and south) parts over-and-above show the AfD levels that woul be expected by their unemployment.